# Data wrangling
library(tidyverse)
# Plotting
library(cowplot)
library(plotly)
library(factoextra)
source('/home/jin/OneDrive/SGB/projects/functions.R')

Project description

Study summary

This is where I would write a summary about the study/project - i.e. how it came about, who is involved, research question, what types of data were delivered, etc.

Objectives

  • List research objectives here
  • You can use asterisk or dash to make a list

Data preparation

Load data

For the demo, we’ll just use the mtcars dataset.

mtcars
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

Plots and Figures

Output from plotting functions are included.

mtcars %>%
  select(mpg, cyl, disp, hp) %>%
  rownames_to_column(var='Model') %>%
  pivot_longer(cols=-Model) %>%
  ggplot(mapping=aes(x=Model,value)) +
  geom_bar(stat='identity') +
  facet_grid(name ~ ., scales='free') + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

2D and 3D PCA plots

For .html output, you can include 3D interactive plots using plotly package.

2D PCA

res.pca <- prcomp(mtcars)
fviz_pca_ind(res.pca,
             col.ind=as.factor(mtcars$cyl),
             addEllipses = TRUE,
             legend.title='cyl',
             repel=TRUE,
             title='')

3D PCA

plotly_pca(res.pca, as.factor(mtcars$cyl))

Appendix

Code

.tocify-header {
  text-indent: initial;
}

.tocify-subheader > .tocify-item {
  text-indent: initial;
  padding-left: 20px;
}

.tocify-subheader .tocify-subheader > .tocify-item {
  text-indent: initial;
  padding-left: 30px;
}

.tocify-subheader .tocify-subheader .tocify-subheader > .tocify-item {
  text-indent: initial;
  padding-left: 40px;
}

.tocify-subheader .tocify-subheader .tocify-subheader > .tocify-item {
  text-indent: initial;
  padding-left: 50px;
}
knitr::opts_chunk$set(
  echo = TRUE,
  include = TRUE,
  cache = FALSE,
  dpi = 150,
  cache.lazy = FALSE
  )
# Data wrangling
library(tidyverse)
# Plotting
library(cowplot)
library(plotly)
library(factoextra)
source('/home/jin/OneDrive/SGB/projects/functions.R')
mtcars
mtcars %>%
  select(mpg, cyl, disp, hp) %>%
  rownames_to_column(var='Model') %>%
  pivot_longer(cols=-Model) %>%
  ggplot(mapping=aes(x=Model,value)) +
  geom_bar(stat='identity') +
  facet_grid(name ~ ., scales='free') + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
res.pca <- prcomp(mtcars)
fviz_pca_ind(res.pca,
             col.ind=as.factor(mtcars$cyl),
             addEllipses = TRUE,
             legend.title='cyl',
             repel=TRUE,
             title='')
plotly_pca(res.pca, as.factor(mtcars$cyl))

sessionInfo()

R session info

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS Stream 9
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
##  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
##  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] factoextra_1.0.7 plotly_4.10.0    cowplot_1.1.1    forcats_0.5.1   
##  [5] stringr_1.4.0    dplyr_1.0.9      purrr_0.3.4      readr_2.1.2     
##  [9] tidyr_1.2.0      tibble_3.1.8     ggplot2_3.3.6    tidyverse_1.3.2 
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.3          sass_0.4.2          jsonlite_1.8.0     
##  [4] viridisLite_0.4.0   carData_3.0-5       modelr_0.1.8       
##  [7] bslib_0.4.0         assertthat_0.2.1    highr_0.9          
## [10] googlesheets4_1.0.0 cellranger_1.1.0    yaml_2.3.5         
## [13] ggrepel_0.9.1       pillar_1.8.0        backports_1.4.1    
## [16] glue_1.6.2          digest_0.6.29       ggsignif_0.6.3     
## [19] rvest_1.0.2         colorspace_2.0-3    htmltools_0.5.3    
## [22] pkgconfig_2.0.3     broom_1.0.0         haven_2.5.0        
## [25] scales_1.2.0        tzdb_0.3.0          googledrive_2.0.0  
## [28] car_3.1-0           generics_0.1.3      farver_2.1.1       
## [31] ellipsis_0.3.2      ggpubr_0.4.0        cachem_1.0.6       
## [34] withr_2.5.0         lazyeval_0.2.2      cli_3.3.0          
## [37] magrittr_2.0.3      crayon_1.5.1        readxl_1.4.0       
## [40] evaluate_0.15       fs_1.5.2            fansi_1.0.3        
## [43] rstatix_0.7.0       xml2_1.3.3          tools_4.1.3        
## [46] data.table_1.14.2   hms_1.1.1           gargle_1.2.0       
## [49] lifecycle_1.0.1     munsell_0.5.0       reprex_2.0.1       
## [52] compiler_4.1.3      jquerylib_0.1.4     rlang_1.0.4        
## [55] grid_4.1.3          rstudioapi_0.13     htmlwidgets_1.5.4  
## [58] crosstalk_1.2.0     labeling_0.4.2      rmarkdown_2.14     
## [61] gtable_0.3.0        abind_1.4-5         DBI_1.1.3          
## [64] R6_2.5.1            lubridate_1.8.0     knitr_1.39         
## [67] fastmap_1.1.0       utf8_1.2.2          stringi_1.7.8      
## [70] Rcpp_1.0.9          vctrs_0.4.1         dbplyr_2.2.1       
## [73] tidyselect_1.1.2    xfun_0.31